bitkeeper revision 1.339.1.6 (3f12cffdzSdqoflJR3gfS-S45xcteA)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 14 Jul 2003 15:45:01 +0000 (15:45 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 14 Jul 2003 15:45:01 +0000 (15:45 +0000)
nmi.c:
  new file
Many files:
  NMI watchdog support in Xen.

.rootkeys
xen/arch/i386/apic.c
xen/arch/i386/entry.S
xen/arch/i386/io_apic.c
xen/arch/i386/nmi.c [new file with mode: 0644]
xen/arch/i386/setup.c
xen/arch/i386/traps.c
xen/include/asm-i386/hardirq.h
xen/include/asm-i386/msr.h
xen/include/xeno/irq_cpustat.h

index 9d6ec81b45d3ab8ea086f6e975ce4e589900afc3..f3ad76356cc8415da7399d5a06d3ca8f7a17ae72 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/i386/irq.c
 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/i386/mm.c
 3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/i386/mpparse.c
+3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/i386/nmi.c
 3ddb79bcnL-_Dtsbtjgxl7vJU3vBiQ xen/arch/i386/pci-dma.c
 3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/i386/pci-i386.c
 3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/i386/pci-i386.h
index 78cfcc2c09d55ceac94ab3e0207ba9e8325f58bc..11d0c54532b995fc936ebc52033b4c02b4d210d2 100644 (file)
@@ -353,6 +353,9 @@ void __init setup_local_APIC (void)
     } else {
         printk("No ESR for 82489DX.\n");
     }
+
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               setup_apic_nmi_watchdog();
 }
 
 
@@ -413,6 +416,8 @@ static int __init detect_init_APIC (void)
     set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
     mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
     boot_cpu_physical_apicid = 0;
+       if (nmi_watchdog != NMI_NONE)
+               nmi_watchdog = NMI_LOCAL_APIC;
 
     printk("Found and enabled local APIC!\n");
     apic_pm_init1();
index b60b5022885a31ea55e363a4181b115dea822480..1460cf77edb06dfeced16fa3dbc5e7ace2cbf3cf 100644 (file)
@@ -131,7 +131,7 @@ CF_MASK             = 0x00000001
 IF_MASK                = 0x00000200
 NT_MASK                = 0x00004000
 
-#define SAVE_ALL \
+#define SAVE_ALL_NOSTI \
        cld; \
        pushl %gs; \
        pushl %fs; \
@@ -146,8 +146,11 @@ NT_MASK            = 0x00004000
        pushl %ebx; \
        movl $(__HYPERVISOR_DS),%edx; \
        movl %edx,%ds; \
-       movl %edx,%es; \
-        sti; 
+       movl %edx,%es;
+
+#define SAVE_ALL \
+       SAVE_ALL_NOSTI \
+       sti;
 
 #define RESTORE_ALL    \
        popl %ebx;      \
@@ -554,7 +557,7 @@ ENTRY(debug)
 
 ENTRY(nmi)
        pushl %eax
-       SAVE_ALL
+       SAVE_ALL_NOSTI
        movl %esp,%edx
        pushl $0
        pushl %edx
index 5935864c71e40392bae1a870c62dea8edfe46fe2..951763a0537fc8990974b2973ef1d22bf509d64d 100644 (file)
@@ -34,8 +34,6 @@
 
 #ifdef CONFIG_X86_IO_APIC
 
-static unsigned int nmi_watchdog;  /* XXXX XEN */
-
 #undef APIC_LOCKUP_DEBUG
 
 #define APIC_LOCKUP_DEBUG
@@ -1641,15 +1639,8 @@ static inline void check_timer(void)
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                unmask_IO_APIC_irq(0);
-               if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               disable_8259A_irq(0);
-                               setup_nmi();
-                               enable_8259A_irq(0);
-                               // XXX Xen check_nmi_watchdog();
-                       }
+               if (timer_irq_works())
                        return;
-               }
                clear_IO_APIC_pin(0, pin1);
                printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
        }
@@ -1667,10 +1658,6 @@ static inline void check_timer(void)
                                replace_pin_at_irq(0, 0, pin1, 0, pin2);
                        else
                                add_pin_to_irq(0, 0, pin2);
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               // XXX Xen check_nmi_watchdog();
-                       }
                        return;
                }
                /*
@@ -1680,11 +1667,6 @@ static inline void check_timer(void)
        }
        printk(" failed.\n");
 
-       if (nmi_watchdog) {
-               printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = 0;
-       }
-
        printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
        disable_8259A_irq(0);
diff --git a/xen/arch/i386/nmi.c b/xen/arch/i386/nmi.c
new file mode 100644 (file)
index 0000000..23fd691
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ *  linux/arch/i386/nmi.c
+ *
+ *  NMI watchdog support on APIC systems
+ *
+ *  Started by Ingo Molnar <mingo@redhat.com>
+ *
+ *  Fixes:
+ *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
+ *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
+ *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/lib.h>
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+
+#include <asm/mc146818rtc.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+
+#undef Dprintk
+#define Dprintk(x...) printk(x)
+
+unsigned int nmi_watchdog = NMI_LOCAL_APIC;
+static unsigned int nmi_hz = HZ;
+unsigned int nmi_perfctr_msr;  /* the MSR to reset in NMI handler */
+extern void show_registers(struct pt_regs *regs);
+
+#define K7_EVNTSEL_ENABLE      (1 << 22)
+#define K7_EVNTSEL_INT         (1 << 20)
+#define K7_EVNTSEL_OS          (1 << 17)
+#define K7_EVNTSEL_USR         (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
+#define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+#define P6_EVNTSEL0_ENABLE     (1 << 22)
+#define P6_EVNTSEL_INT         (1 << 20)
+#define P6_EVNTSEL_OS          (1 << 17)
+#define P6_EVNTSEL_USR         (1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+#define MSR_P4_MISC_ENABLE     0x1A0
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1<<7)
+#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL        (1<<12)
+#define MSR_P4_PERFCTR0                0x300
+#define MSR_P4_CCCR0           0x360
+#define P4_ESCR_EVENT_SELECT(N)        ((N)<<25)
+#define P4_ESCR_OS             (1<<3)
+#define P4_ESCR_USR            (1<<2)
+#define P4_CCCR_OVF_PMI                (1<<26)
+#define P4_CCCR_THRESHOLD(N)   ((N)<<20)
+#define P4_CCCR_COMPLEMENT     (1<<19)
+#define P4_CCCR_COMPARE                (1<<18)
+#define P4_CCCR_REQUIRED       (3<<16)
+#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
+#define P4_CCCR_ENABLE         (1<<12)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+#define MSR_P4_IQ_COUNTER0     0x30C
+#define MSR_P4_IQ_CCCR0                0x36C
+#define MSR_P4_CRU_ESCR0       0x3B8
+#define P4_NMI_CRU_ESCR0       (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
+#define P4_NMI_IQ_CCCR0        \
+       (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|      \
+        P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+
+int __init check_nmi_watchdog (void)
+{
+    unsigned int prev_nmi_count[NR_CPUS];
+    int j, cpu;
+    
+    printk("testing NMI watchdog ---\n");
+
+    for (j = 0; j < smp_num_cpus; j++) {
+        cpu = cpu_logical_map(j);
+        prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
+    }
+    sti();
+    mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
+
+    for (j = 0; j < smp_num_cpus; j++) {
+        cpu = cpu_logical_map(j);
+        if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+            printk("CPU#%d: NMI stuck? (Hyperthread secondary CPU?)\n", cpu);
+        else
+            printk("CPU#%d: NMI okay\n", cpu);
+    }
+
+    /* now that we know it works we can reduce NMI frequency to
+       something more reasonable; makes a difference in some configs */
+    if (nmi_watchdog == NMI_LOCAL_APIC)
+        nmi_hz = 1;
+
+    return 0;
+}
+
+static inline void nmi_pm_init(void) { }
+#define __pminit       __init
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ * Original code written by Keith Owens.
+ */
+
+static void __pminit clear_msr_range(unsigned int base, unsigned int n)
+{
+    unsigned int i;
+
+    for(i = 0; i < n; ++i)
+        wrmsr(base+i, 0, 0);
+}
+
+static void __pminit setup_k7_watchdog(void)
+{
+    unsigned int evntsel;
+
+    nmi_perfctr_msr = MSR_K7_PERFCTR0;
+
+    clear_msr_range(MSR_K7_EVNTSEL0, 4);
+    clear_msr_range(MSR_K7_PERFCTR0, 4);
+
+    evntsel = K7_EVNTSEL_INT
+        | K7_EVNTSEL_OS
+        | K7_EVNTSEL_USR
+        | K7_NMI_EVENT;
+
+    wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+    Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+    wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+    apic_write(APIC_LVTPC, APIC_DM_NMI);
+    evntsel |= K7_EVNTSEL_ENABLE;
+    wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+}
+
+static void __pminit setup_p6_watchdog(void)
+{
+    unsigned int evntsel;
+
+    nmi_perfctr_msr = MSR_P6_PERFCTR0;
+
+    clear_msr_range(MSR_P6_EVNTSEL0, 2);
+    clear_msr_range(MSR_P6_PERFCTR0, 2);
+
+    evntsel = P6_EVNTSEL_INT
+        | P6_EVNTSEL_OS
+        | P6_EVNTSEL_USR
+        | P6_NMI_EVENT;
+
+    wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+    Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+    wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+    apic_write(APIC_LVTPC, APIC_DM_NMI);
+    evntsel |= P6_EVNTSEL0_ENABLE;
+    wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+}
+
+static int __pminit setup_p4_watchdog(void)
+{
+    unsigned int misc_enable, dummy;
+
+    rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+    if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+        return 0;
+
+    nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+
+    if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
+        clear_msr_range(0x3F1, 2);
+    /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
+       docs doesn't fully define it, so leave it alone for now. */
+    clear_msr_range(0x3A0, 31);
+    clear_msr_range(0x3C0, 6);
+    clear_msr_range(0x3C8, 6);
+    clear_msr_range(0x3E0, 2);
+    clear_msr_range(MSR_P4_CCCR0, 18);
+    clear_msr_range(MSR_P4_PERFCTR0, 18);
+
+    wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
+    wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
+    Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+    wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+    apic_write(APIC_LVTPC, APIC_DM_NMI);
+    wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+    return 1;
+}
+
+void __pminit setup_apic_nmi_watchdog (void)
+{
+    switch (boot_cpu_data.x86_vendor) {
+    case X86_VENDOR_AMD:
+        if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
+            return;
+        setup_k7_watchdog();
+        break;
+    case X86_VENDOR_INTEL:
+        switch (boot_cpu_data.x86) {
+        case 6:
+            setup_p6_watchdog();
+            break;
+        case 15:
+            if (!setup_p4_watchdog())
+                return;
+            break;
+        default:
+            return;
+        }
+        break;
+    default:
+        return;
+    }
+    nmi_pm_init();
+}
+
+
+static unsigned int
+last_irq_sums [NR_CPUS],
+    alert_counter [NR_CPUS];
+
+void touch_nmi_watchdog (void)
+{
+    int i;
+    for (i = 0; i < smp_num_cpus; i++)
+        alert_counter[i] = 0;
+}
+
+void nmi_watchdog_tick (struct pt_regs * regs)
+{
+    extern spinlock_t console_lock;
+    extern void die(const char * str, struct pt_regs * regs, long err);
+    extern void putchar_serial(unsigned char c);
+
+    int sum, cpu = smp_processor_id();
+
+    sum = apic_timer_irqs[cpu];
+    
+    if (last_irq_sums[cpu] == sum) {
+        /*
+         * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) 
+         * before doing the oops ...
+         */
+        alert_counter[cpu]++;
+        if (alert_counter[cpu] == 5*nmi_hz) {
+            console_lock = SPIN_LOCK_UNLOCKED;
+            die("NMI Watchdog detected LOCKUP on CPU", regs, cpu);
+        }
+    } else {
+        last_irq_sums[cpu] = sum;
+        alert_counter[cpu] = 0;
+    }
+
+    if (nmi_perfctr_msr) {
+        if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
+            /*
+             * P4 quirks: - An overflown perfctr will assert its interrupt
+             *   until the OVF flag in its CCCR is cleared. - LVTPC is masked 
+             * on interrupt and must be
+             *   unmasked by the LVTPC handler.
+             */
+            wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+            apic_write(APIC_LVTPC, APIC_DM_NMI);
+        }
+        wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+    }
+}
index bbd2d65603826cfed2f3a8bb92d14a3fb7a6a780..1372c7b403d20399aba93ce186f590ad021c91a7 100644 (file)
@@ -148,7 +148,8 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
             c->x86_capability[2] = cpuid_edx(0x80860001);
     }
 
-    printk("CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+    printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+           smp_processor_id(),
            c->x86_capability[0],
            c->x86_capability[1],
            c->x86_capability[2],
@@ -345,13 +346,15 @@ void __init start_of_day(void)
 #endif
     initialize_keytable(); /* call back handling for key codes      */
 
-       disable_pit();          /* not needed anymore */
-       ac_timer_init();    /* init accurate timers */
-       init_xeno_time();       /* initialise the time */
-       schedulers_start(); /* start scheduler for each CPU */
+    disable_pit();             /* not needed anymore */
+    ac_timer_init();    /* init accurate timers */
+    init_xeno_time();  /* initialise the time */
+    schedulers_start(); /* start scheduler for each CPU */
 
     sti();
 
+    check_nmi_watchdog();
+
     zap_low_mappings();
     kmem_cache_init();
     kmem_cache_sizes_init(max_page);
@@ -369,7 +372,6 @@ void __init start_of_day(void)
     net_init();            /* initializes virtual network system. */
     initialize_block_io(); /* setup block devices */
 
-
 #ifdef CONFIG_SMP
     wait_init_idle = cpu_online_map;
     clear_bit(smp_processor_id(), &wait_init_idle);
index 8ca6b90ccffc74633c1aa1048033fa92ba7c3924..66e52c24568ee79b36c46657da8a489f24f675e5 100644 (file)
@@ -470,7 +470,15 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
 {
     unsigned char reason = inb(0x61);
 
+    ++nmi_count(smp_processor_id());
+
     if (!(reason & 0xc0)) {
+#if CONFIG_X86_LOCAL_APIC
+        if (nmi_watchdog) {
+            nmi_watchdog_tick(regs);
+            return;
+        }
+#endif
         unknown_nmi_error(reason, regs);
         return;
     }
index f0a9024dcd72f27a6d69f92ab23f0eda07bf7a26..9afe12e2ab829f5c8a64c5bb9cc085eedc4fde88 100644 (file)
@@ -10,6 +10,7 @@ typedef struct {
        unsigned int __local_irq_count;
        unsigned int __local_bh_count;
        unsigned int __syscall_count;
+       unsigned int __nmi_count;
        unsigned long idle_timestamp;
 } ____cacheline_aligned irq_cpustat_t;
 
index 11bcb7f29e4216189bd0cd889ee797182ae51985..45ec765e6ec6e0cd821a55d41b7213c45cd09725 100644 (file)
 #define MSR_IA32_UCODE_WRITE           0x79
 #define MSR_IA32_UCODE_REV             0x8b
 
-#define MSR_IA32_PERFCTR0              0xc1
-#define MSR_IA32_PERFCTR1              0xc2
-
 #define MSR_IA32_BBL_CR_CTL            0x119
 
 #define MSR_IA32_MCG_CAP               0x179
 #define MSR_IA32_MCG_STATUS            0x17a
 #define MSR_IA32_MCG_CTL               0x17b
 
-#define MSR_IA32_EVNTSEL0              0x186
-#define MSR_IA32_EVNTSEL1              0x187
+#define MSR_IA32_THERM_CONTROL         0x19a
+#define MSR_IA32_THERM_INTERRUPT       0x19b
+#define MSR_IA32_THERM_STATUS          0x19c
+#define MSR_IA32_MISC_ENABLE           0x1a0
 
 #define MSR_IA32_DEBUGCTLMSR           0x1d9
 #define MSR_IA32_LASTBRANCHFROMIP      0x1db
 #define MSR_IA32_MC0_ADDR              0x402
 #define MSR_IA32_MC0_MISC              0x403
 
+#define MSR_P6_PERFCTR0                        0xc1
+#define MSR_P6_PERFCTR1                        0xc2
+#define MSR_P6_EVNTSEL0                        0x186
+#define MSR_P6_EVNTSEL1                        0x187
+
 /* AMD Defined MSRs */
 #define MSR_K6_EFER                    0xC0000080
 #define MSR_K6_STAR                    0xC0000081
 #define MSR_K6_WHCR                    0xC0000082
 #define MSR_K6_UWCCR                   0xC0000085
+#define MSR_K6_EPMR                    0xC0000086
 #define MSR_K6_PSOR                    0xC0000087
 #define MSR_K6_PFIR                    0xC0000088
 
 #define MSR_K7_EVNTSEL0                        0xC0010000
 #define MSR_K7_PERFCTR0                        0xC0010004
+#define MSR_K7_HWCR                    0xC0010015
+#define MSR_K7_CLK_CTL                 0xC001001b
+#define MSR_K7_FID_VID_CTL             0xC0010041
+#define MSR_K7_VID_STATUS              0xC0010042
 
 /* Centaur-Hauls/IDT defined MSRs. */
 #define MSR_IDT_FCR1                   0x107
 
 /* VIA Cyrix defined MSRs*/
 #define MSR_VIA_FCR                    0x1107
+#define MSR_VIA_LONGHAUL               0x110a
+#define MSR_VIA_BCR2                   0x1147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL          0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS         0x80868011
+#define MSR_TMTA_LRTI_READOUT          0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ         0x8086801a
 
 #endif /* __ASM_MSR_H */
index 646655403a7273c1077834e468f61b2db04ac3ca..589283bffe3b8d83c707f6123cba5c45b7e24307 100644 (file)
@@ -30,5 +30,6 @@ extern irq_cpustat_t irq_stat[];                      /* defined in asm/hardirq.h */
 #define local_irq_count(cpu)   __IRQ_STAT((cpu), __local_irq_count)
 #define local_bh_count(cpu)    __IRQ_STAT((cpu), __local_bh_count)
 #define syscall_count(cpu)     __IRQ_STAT((cpu), __syscall_count)
+#define nmi_count(cpu)         __IRQ_STAT((cpu), __nmi_count)
 
 #endif /* __irq_cpustat_h */